import hashlib
import sys
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import urlparse
import requests
from PyQt5.QtGui import QStandardItemModel, QStandardItem, QPixmap, QIcon
from bs4 import BeautifulSoup
from PyQt5.QtWidgets import QWidget, QApplication, QFileDialog
from ImageParserForm import Ui_Form
import threading
from pathlib import Path
import os

header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
}

def download(args):
    src = args[0]
    parser = args[1]
    try:
        md5 = hashlib.md5()
        md5.update(src.encode("utf-8"))
        filename = md5.hexdigest() + os.path.splitext(src)[1]
        fp = '%s/%s' % (parser.get_dir(), filename)
        path = Path(fp)
        if not path.is_file():
            print('download:%s' % src)
            resp = requests.get(src, header)
            with open(fp, 'wb') as f:
                f.write(resp.content)
            img = QPixmap()
            img.loadFromData(resp.content)
            item = QStandardItem(src)
            item.setIcon(QIcon(img))
            parser.ui.addItem(item)
    except Exception as e:
        print(e)
    finally:
        parser.down_and_notify()


class ImageParser(threading.Thread):

    def __init__(self, ui):
        super().__init__()
        self.ui = ui
        self.dir = None
        self.pool = ThreadPoolExecutor(max_workers=3)
        self.loadingStatus = False
        self.urlSet = set([])
        self.doneUrlSet = set([])
        self.doneImgSet = set([])
        self.domain = None
        self.imgNum = 0
        self.condition = threading.Condition()

    def parse(self, url):
        print('parse:%s' % url)
        req = requests.get(url, header)
        soup = None
        if req.encoding == 'ISO-8859-1':
            encodings = requests.utils.get_encodings_from_content(req.text)
            if encodings:
                encoding = encodings[0]
            else:
                encoding = req.apparent_encoding
            encode_content = req.content.decode(encoding, 'replace').encode('utf-8', 'replace')
            soup = BeautifulSoup(encode_content, 'html.parser')
        else:
            soup = BeautifulSoup(req.text, 'html.parser')

        self.parse_links(soup)
        self.parse_imgs(soup)

    def parse_links(self, doc):
        links = doc.find_all('a')
        for l in links:
            if 'href' in l.attrs:
                href = l['href']
                if 'http' not in href:
                    if 'javascript' not in href:
                        href = '%s://%s/%s' % (self.domain.scheme, self.domain.netloc, href)
                    else:
                        continue
                elif self.domain.netloc not in href:
                    continue
                href = href.split('?')[0]
                if href[-1] == '/':
                    href = href[:-1]
                if href not in self.urlSet and href not in self.doneUrlSet:
                    print(href)
                    self.urlSet.add(href)

    def parse_imgs(self, doc):
        imgs = doc.find_all('img')
        self.imgNum = len(imgs)
        print('imgNum:%d' % self.imgNum)
        if self.imgNum > 0 and self.loadingStatus:
            for img in imgs:
                if 'src' in img.attrs:
                    src = img['src'].split('?')[0]
                    if 'http' in src and src not in self.doneImgSet:
                        self.doneImgSet.add(src)
                        self.pool.submit(download, args=(src, self, ))
                    else:
                        self.down_and_notify()

    def down_and_notify(self):
        with self.condition:
            self.imgNum -= 1
            if self.imgNum <= 0:
                self.condition.notify()

    def run(self):
        self.ui.set_loading_status(True)
        self.start_parse()
        self.ui.set_loading_status(False)

    def start_parse(self):
        url = self.ui.get_url()
        if url[-1] == '/':
            url = url[:-1]
        self.dir = self.ui.get_dir()
        self.domain = urlparse(url)

        self.urlSet.add(url)
        self.set_loading_status(True)
        while len(self.urlSet) > 0 and self.loadingStatus:
            try:
                self.parse(self.urlSet.pop())
                with self.condition:
                    if self.loadingStatus and self.imgNum > 0:
                        self.condition.wait()
            except Exception as e:
                print(e)
        self.set_loading_status(False)
        print('finish parse')

    def set_loading_status(self, status):
        self.loadingStatus = status

    def stop_parse(self):
        self.set_loading_status(False)
        with self.condition:
            self.condition.notify()

    def get_dir(self):
        return self.dir


class ImageParserWin(QWidget, Ui_Form):

    def __init__(self):
        super().__init__()
        self.setupUi(self)
        self.dirLineEdit.setText('D:/pyimages')
        self.choosePushButton.clicked.connect(self.choose_dir)
        self.startPushButton.clicked.connect(self.start)
        self.stopPushButton.clicked.connect(self.stop)
        self.stopPushButton.setEnabled(False)
        self.model = QStandardItemModel()
        self.listView.setModel(self.model)
        self.parser = None
        self.lock = threading.Lock()

    def get_url(self):
        return self.addrLineEdit.text()

    def get_dir(self):
        return self.dirLineEdit.text()

    def choose_dir(self):
        dir = QFileDialog.getExistingDirectory(None, '存放目录', ".")
        self.dirLineEdit.setText(dir)

    def start(self):
        self.parser = ImageParser(self)
        self.parser.start()

    def set_loading_status(self, status):
        self.startPushButton.setEnabled(not status)
        self.stopPushButton.setEnabled(status)

    def stop(self):
        self.parser.stop_parse()

    def addItem(self, item):
        with self.lock:
            self.model.appendRow(item)

    def closeEvent(self, QCloseEvent):
        if self.parser is not None:
            self.stop()


if __name__ == '__main__':
    app = QApplication(sys.argv)
    win = ImageParserWin()
    win.show()
    sys.exit(app.exec_())
